"""
Data acquisition module for Demographics & Capital Flows project.

Downloads and caches data from:
- UN World Population Prospects 2024 (demographics)
- IMF WEO (macro fundamentals)
- Penn World Tables 10.01 (output per worker, etc.)
- World Bank WDI (health expenditure)
- Chinn-Ito KAOPEN (financial openness)
- Lane & Milesi-Ferretti EWN (NFA/GDP)
- FRED (interest rates for OECD countries)
- IMF IFS (interest rates for developing countries)
"""

import os
import io
import zipfile
import requests
import pandas as pd
import numpy as np
from pathlib import Path

RAW_DIR = Path("/mnt/c/demographics_capital_flows/multilateral/data/raw")
PROCESSED_DIR = Path("/mnt/c/demographics_capital_flows/multilateral/data/processed")


def _ensure_dirs():
    RAW_DIR.mkdir(parents=True, exist_ok=True)
    PROCESSED_DIR.mkdir(parents=True, exist_ok=True)


# ---------------------------------------------------------------------------
# 1. UN World Population Prospects 2024
# ---------------------------------------------------------------------------

def download_un_wpp(force=False):
    """Download UN WPP 2024 population by 5-year age group."""
    outfile = RAW_DIR / "un_wpp_population_by_age.csv"
    if outfile.exists() and not force:
        print(f"UN WPP data already exists at {outfile}")
        return pd.read_csv(outfile, low_memory=False)

    _ensure_dirs()
    print("Downloading UN WPP 2024 population by age group...")

    # Bulk CSV download — updated URL structure (2024+)
    urls_to_try = [
        "https://population.un.org/wpp/assets/Excel%20Files/1_Indicator%20(Standard)/CSV_FILES/WPP2024_Population1JanuaryByAge5GroupSex_Medium.csv.gz",
        "https://population.un.org/wpp/assets/Excel%20Files/1_Indicator%20(Standard)/CSV_FILES/WPP2024_PopulationByAge5GroupSex_Medium.csv.gz",
    ]

    for url in urls_to_try:
        try:
            print(f"  Trying {url}...")
            resp = requests.get(url, timeout=300, stream=True)
            if resp.status_code == 200:
                gz_file = RAW_DIR / "un_wpp_pop.csv.gz"
                total = 0
                with open(gz_file, 'wb') as f:
                    for chunk in resp.iter_content(chunk_size=65536):
                        f.write(chunk)
                        total += len(chunk)
                print(f"  Downloaded {total/1e6:.1f} MB")
                df = pd.read_csv(gz_file, compression='gzip', low_memory=False)
                df.to_csv(outfile, index=False)
                print(f"  Saved {len(df)} records to {outfile}")
                return df
            else:
                print(f"  Status {resp.status_code}")
        except Exception as e:
            print(f"  Failed: {e}")

    print("  Could not download UN WPP data automatically.")
    print("  Please download from: https://population.un.org/wpp/downloads")
    print(f"  Save as: {outfile}")
    return None


# ---------------------------------------------------------------------------
# 2. IMF World Economic Outlook
# ---------------------------------------------------------------------------

def download_weo(force=False):
    """Download latest WEO data using the weo package."""
    outfile = RAW_DIR / "weo_data.csv"
    if outfile.exists() and not force:
        print(f"WEO data already exists at {outfile}")
        return pd.read_csv(outfile)

    _ensure_dirs()
    print("Downloading IMF WEO data...")
    import weo

    # Download the latest release
    year, release = 2025, 1
    weo_file = RAW_DIR / f"weo_{year}_{release}.csv"

    try:
        weo.download(year, release, filename=str(weo_file))
    except Exception as e:
        print(f"  Failed to download WEO {year}-{release}: {e}")
        # Try previous release
        year, release = 2024, 2
        weo_file = RAW_DIR / f"weo_{year}_{release}.csv"
        weo.download(year, release, filename=str(weo_file))

    w = weo.WEO(str(weo_file))

    # Extract key variables
    codes = {
        'BCA_NGDPD': 'ca_gdp',           # Current account / GDP
        'GGXCNL_NGDP': 'fiscal_bal_gdp',  # Fiscal balance / GDP
        'NGDP_RPCH': 'rgdp_growth',        # Real GDP growth
        'NGAP_NPGDP': 'output_gap',        # Output gap
        'PCPIPCH': 'inflation',             # CPI inflation
        'NGDPD': 'ngdp_usd',              # Nominal GDP, USD billions
        'PPPGDP': 'gdp_ppp',              # GDP PPP
        'LP': 'population_weo',             # Population (millions)
        'BCA': 'ca_usd',                   # Current account, USD billions
    }

    frames = []
    for code, name in codes.items():
        try:
            df_var = w.getc(code)
            # WEO getc returns wide: PeriodIndex rows (years), country ISO3 columns
            df_var = df_var.reset_index()
            # The index column is named 'index' or similar after reset
            idx_col = df_var.columns[0]
            df_long = df_var.melt(
                id_vars=[idx_col], var_name='iso3', value_name=name
            )
            df_long['year'] = df_long[idx_col].astype(str).str[:4].astype(int)
            df_long = df_long.drop(columns=[idx_col])
            frames.append(df_long)
            print(f"  Got {code} ({name}): {df_long[name].notna().sum():,} non-null values")
        except Exception as e:
            print(f"  Warning: Could not get {code} ({name}): {e}")

    if frames:
        df = frames[0]
        for f in frames[1:]:
            df = df.merge(f, on=['iso3', 'year'], how='outer')

        # Convert numeric columns
        for col in df.columns:
            if col not in ['iso3', 'year']:
                df[col] = pd.to_numeric(df[col], errors='coerce')

        df.to_csv(outfile, index=False)
        print(f"  Saved WEO data: {df.shape}")
        return df

    return None


# ---------------------------------------------------------------------------
# 3. Penn World Tables
# ---------------------------------------------------------------------------

def download_pwt(force=False):
    """Download Penn World Tables 10.01."""
    outfile = RAW_DIR / "pwt1001.xlsx"
    if outfile.exists() and not force:
        print(f"PWT data already exists at {outfile}")
        return pd.read_excel(outfile, sheet_name='Data')

    _ensure_dirs()
    print("Downloading Penn World Tables 10.01...")

    urls = [
        "https://dataverse.nl/api/access/datafile/354095",
        "https://dataverse.nl/api/access/datafile/354098",
        "https://www.rug.nl/ggdc/docs/pwt1001.xlsx",
    ]

    for url in urls:
        try:
            print(f"  Trying {url}...")
            resp = requests.get(url, timeout=120, stream=True)
            if resp.status_code == 200:
                total = 0
                with open(outfile, 'wb') as f:
                    for chunk in resp.iter_content(chunk_size=65536):
                        f.write(chunk)
                        total += len(chunk)
                print(f"  Downloaded {total/1e6:.1f} MB")
                df = pd.read_excel(outfile, sheet_name='Data')
                print(f"  Saved PWT data: {df.shape}")
                return df
        except Exception as e:
            print(f"  Failed: {e}")

    print("  Could not download PWT automatically.")
    print("  Please download from: https://www.rug.nl/ggdc/productivity/pwt/")
    print(f"  Save as: {outfile}")
    return None


# ---------------------------------------------------------------------------
# 4. World Bank WDI
# ---------------------------------------------------------------------------

def download_wdi(force=False):
    """Download World Bank WDI indicators via wbgapi."""
    outfile = RAW_DIR / "wdi_data.csv"
    if outfile.exists() and not force:
        print(f"WDI data already exists at {outfile}")
        return pd.read_csv(outfile)

    _ensure_dirs()
    print("Downloading World Bank WDI data...")
    import wbgapi as wb

    indicators = {
        'SH.XPD.GHED.GD.ZS': 'health_exp_gdp',     # Public health expenditure % GDP
        'SP.DYN.LE00.IN': 'life_expectancy',          # Life expectancy at birth
        'NY.GDP.PCAP.PP.KD': 'gdp_pc_ppp',           # GDP per capita, PPP (constant)
        'BN.CAB.XOKA.GD.ZS': 'ca_gdp_wb',           # Current account % GDP (WB)
        'NE.TRD.GNFS.ZS': 'trade_openness',          # Trade % GDP
        'FP.CPI.TOTL.ZG': 'cpi_inflation_wb',        # CPI inflation
    }

    frames = []
    for ind_code, var_name in indicators.items():
        try:
            print(f"  Fetching {var_name} ({ind_code})...")
            data = wb.data.DataFrame(ind_code, time=range(1970, 2025), labels=False)
            data = data.reset_index()
            data = data.melt(id_vars=['economy'], var_name='year', value_name=var_name)
            data['year'] = data['year'].str.replace('YR', '').astype(int)
            data = data.rename(columns={'economy': 'iso3'})
            frames.append(data)
        except Exception as e:
            print(f"  Warning: Could not get {ind_code}: {e}")

    if frames:
        df = frames[0]
        for f in frames[1:]:
            df = df.merge(f, on=['iso3', 'year'], how='outer')
        df.to_csv(outfile, index=False)
        print(f"  Saved WDI data: {df.shape}")
        return df

    return None


# ---------------------------------------------------------------------------
# 5. Chinn-Ito KAOPEN Index
# ---------------------------------------------------------------------------

def download_kaopen(force=False):
    """Download Chinn-Ito KAOPEN financial openness index."""
    outfile = RAW_DIR / "kaopen.csv"
    if outfile.exists() and not force:
        print(f"KAOPEN data already exists at {outfile}")
        return pd.read_csv(outfile)

    _ensure_dirs()
    print("Downloading Chinn-Ito KAOPEN index...")

    # Try downloading the Excel file from the Chinn-Ito website
    urls = [
        "https://web.pdx.edu/~ito/kaopen_2023.xls",
        "https://web.pdx.edu/~ito/kaopen_2023.xlsx",
        "https://web.pdx.edu/~ito/kaopen_2022.xls",
    ]

    for url in urls:
        try:
            print(f"  Trying {url}...")
            resp = requests.get(url, timeout=60)
            if resp.status_code == 200:
                ext = url.rsplit('.', 1)[-1]
                local_file = RAW_DIR / f"kaopen.{ext}"
                with open(local_file, 'wb') as f:
                    f.write(resp.content)
                df = pd.read_excel(local_file)
                # Standardize column names
                col_map = {}
                for c in df.columns:
                    cl = c.lower().strip()
                    if 'iso' in cl and '3' not in cl and 'code' in cl:
                        col_map[c] = 'iso2'
                    elif cl in ('ccode', 'country_code'):
                        col_map[c] = 'ccode'
                    elif cl == 'year':
                        col_map[c] = 'year'
                    elif 'kaopen' in cl or cl == 'ka_open':
                        col_map[c] = 'kaopen'
                    elif cl == 'country_name' or cl == 'country':
                        col_map[c] = 'country'

                df = df.rename(columns=col_map)
                df.to_csv(outfile, index=False)
                print(f"  Saved KAOPEN data: {df.shape}")
                return df
        except Exception as e:
            print(f"  Failed: {e}")

    print("  Could not download KAOPEN automatically.")
    print("  Please download from: https://web.pdx.edu/~ito/Chinn-Ito_website.htm")
    return None


# ---------------------------------------------------------------------------
# 6. Lane & Milesi-Ferretti External Wealth of Nations
# ---------------------------------------------------------------------------

def download_ewn(force=False):
    """Download Lane & Milesi-Ferretti External Wealth of Nations dataset."""
    outfile = RAW_DIR / "ewn.csv"
    if outfile.exists() and not force:
        print(f"EWN data already exists at {outfile}")
        return pd.read_csv(outfile)

    _ensure_dirs()
    print("Downloading Lane & Milesi-Ferretti EWN dataset...")

    urls = [
        "https://www.brookings.edu/wp-content/uploads/2026/02/EWN-dataset-year-end-2024_feb06.xlsx",
        "https://www.brookings.edu/wp-content/uploads/2023/12/EWN-dataset-Dec2023.xlsx",
        "https://www.brookings.edu/wp-content/uploads/2022/12/EWN-dataset-Dec2022.xlsx",
    ]

    for url in urls:
        try:
            print(f"  Trying {url}...")
            resp = requests.get(url, timeout=120)
            if resp.status_code == 200:
                xlsx_file = RAW_DIR / "ewn.xlsx"
                with open(xlsx_file, 'wb') as f:
                    f.write(resp.content)
                # Data is typically in 'Dataset' sheet
                try:
                    df = pd.read_excel(xlsx_file, sheet_name='Dataset')
                except Exception:
                    df = pd.read_excel(xlsx_file)
                df.to_csv(outfile, index=False)
                print(f"  Saved EWN data: {df.shape}")
                return df
        except Exception as e:
            print(f"  Failed: {e}")

    print("  Could not download EWN automatically.")
    print("  Please download from: https://www.brookings.edu/articles/the-external-wealth-of-nations/")
    return None


# ---------------------------------------------------------------------------
# 7. FRED Interest Rates (OECD countries)
# ---------------------------------------------------------------------------

def download_fred_rates(api_key, force=False):
    """Download government bond yields and short rates from FRED."""
    outfile = RAW_DIR / "fred_rates.csv"
    if outfile.exists() and not force:
        print(f"FRED rates already exist at {outfile}")
        return pd.read_csv(outfile)

    _ensure_dirs()
    print("Downloading interest rates from FRED...")
    from fredapi import Fred
    fred = Fred(api_key=api_key)

    # OECD country codes for FRED series
    countries = {
        'USA': 'US', 'GBR': 'GB', 'DEU': 'DE', 'FRA': 'FR', 'JPN': 'JP',
        'CAN': 'CA', 'AUS': 'AU', 'ITA': 'IT', 'ESP': 'ES', 'NLD': 'NL',
        'BEL': 'BE', 'AUT': 'AT', 'CHE': 'CH', 'SWE': 'SE', 'NOR': 'NO',
        'DNK': 'DK', 'FIN': 'FI', 'IRL': 'IE', 'PRT': 'PT', 'GRC': 'GR',
        'NZL': 'NZ', 'KOR': 'KR', 'MEX': 'MX', 'TUR': 'TR', 'POL': 'PL',
        'CZE': 'CZ', 'HUN': 'HU', 'ISR': 'IL', 'CHL': 'CL', 'ZAF': 'ZA',
        'COL': 'CO', 'ISL': 'IS', 'LUX': 'LU', 'SVK': 'SK', 'SVN': 'SI',
        'EST': 'EE', 'LVA': 'LV', 'LTU': 'LT',
    }

    records = []
    for iso3, cc in countries.items():
        # 10-year government bond yield
        for series_template, var_name in [
            (f"IRLTLT01{cc}M156N", 'govt_bond_10y'),
            (f"IR3TIB01{cc}M156N", 'short_rate_3m'),
        ]:
            try:
                s = fred.get_series(series_template, observation_start='1970-01-01')
                if s is not None and len(s) > 0:
                    df_temp = s.resample('YE').mean().reset_index()
                    df_temp.columns = ['date', var_name]
                    df_temp['iso3'] = iso3
                    df_temp['year'] = df_temp['date'].dt.year
                    records.append(df_temp[['iso3', 'year', var_name]])
                    print(f"  Got {var_name} for {iso3}: {len(df_temp)} years")
            except Exception as e:
                # Try alternative series names
                pass

    if records:
        # Concat all records and aggregate by iso3-year
        df = pd.concat(records, ignore_index=True)
        df = df.groupby(['iso3', 'year']).first().reset_index()
        df.to_csv(outfile, index=False)
        print(f"  Saved FRED rates: {df.shape}")
        return df

    return None


# ---------------------------------------------------------------------------
# 8. IMF IFS Interest Rates (broader country coverage)
# ---------------------------------------------------------------------------

def download_imf_ifs_rates(force=False):
    """Download interest rates from IMF IFS for broader country coverage."""
    outfile = RAW_DIR / "imf_ifs_rates.csv"
    if outfile.exists() and not force:
        print(f"IMF IFS rates already exist at {outfile}")
        return pd.read_csv(outfile)

    _ensure_dirs()
    print("Downloading interest rates from IMF IFS...")
    import imfp

    try:
        # MFS_IR = Monetary and Financial Statistics, Interest Rates
        # (IFS was restructured into topic-specific datasets)
        indicators = [
            'MMRT_RT_PT_A_PT',           # Money market rate
            'MFS166_RT_PT_A_PT',         # Policy rate
            'MFS162_RT_PT_A_PT',         # Lending rate
            'S13BONDSML_RT_PT_A_PT',     # Government bond yield (medium & long term)
        ]

        frames = []
        for ind in indicators:
            try:
                print(f"  Fetching MFS_IR indicator {ind}...")
                df = imfp.imf_dataset(
                    "MFS_IR",
                    indicator=[ind],
                    frequency=['A'],
                    start_year=1970,
                    end_year=2024
                )
                if df is not None and len(df) > 0:
                    ind_names = {
                        'MMRT_RT_PT_A_PT': 'money_market_rate',
                        'MFS166_RT_PT_A_PT': 'policy_rate',
                        'MFS162_RT_PT_A_PT': 'lending_rate',
                        'S13BONDSML_RT_PT_A_PT': 'govt_bond_yield',
                    }
                    var_name = ind_names.get(ind, ind.lower())
                    # Column names: country, indicator, frequency, time_period, obs_value
                    df = df.rename(columns={
                        'country': 'iso3',
                        'time_period': 'year',
                        'obs_value': var_name
                    })
                    df['year'] = pd.to_numeric(df['year'], errors='coerce')
                    df[var_name] = pd.to_numeric(df[var_name], errors='coerce')
                    frames.append(df[['iso3', 'year', var_name]].dropna())
                    print(f"    Got {len(df)} records, {df['iso3'].nunique()} countries")
            except Exception as e:
                print(f"  Warning: Could not get {ind}: {e}")

        if frames:
            df = frames[0]
            for f in frames[1:]:
                merge_key = list(set(df.columns) & set(f.columns) & {'iso3', 'iso2', 'year'})
                df = df.merge(f, on=merge_key, how='outer')
            df.to_csv(outfile, index=False)
            print(f"  Saved IMF IFS rates: {df.shape}")
            return df

    except Exception as e:
        print(f"  Failed to download IMF IFS data: {e}")

    return None


# ---------------------------------------------------------------------------
# 9. WDI Savings & Investment
# ---------------------------------------------------------------------------

def download_wdi_savings(force=False):
    """Download gross savings and investment rates from World Bank WDI."""
    outfile = RAW_DIR / "wdi_savings_investment.csv"
    if outfile.exists() and not force:
        print(f"WDI savings/investment data already exists at {outfile}")
        return pd.read_csv(outfile)

    _ensure_dirs()
    print("Downloading WDI savings/investment indicators...")

    try:
        import wbgapi as wb
    except ImportError:
        print("  wbgapi not installed, skipping WDI savings download")
        return None

    indicators = {
        'NY.GDS.TOTL.ZS': 'gross_savings_gdp',
        'NY.GNS.ICTR.ZS': 'gross_national_savings_gdp',
        'NE.GDI.TOTL.ZS': 'gross_investment_gdp',
        'NE.GDI.FTOT.ZS': 'gross_fixed_investment_gdp',
    }

    all_data = []
    for ind_id, ind_name in indicators.items():
        try:
            df = wb.data.DataFrame(
                ind_id, economy='all', time=range(1960, 2025),
                labels=False, skipBlanks=True,
            ).reset_index()
            df_long = df.melt(id_vars=['economy'], var_name='yr', value_name=ind_name)
            df_long['iso3'] = df_long['economy']
            df_long['year'] = pd.to_numeric(df_long['yr'].str.replace('YR', ''), errors='coerce')
            df_long = df_long[['iso3', 'year', ind_name]].dropna()
            all_data.append(df_long)
            print(f"  {ind_name}: {len(df_long)} obs, {df_long['iso3'].nunique()} countries")
        except Exception as e:
            print(f"  {ind_name}: FAILED - {e}")

    if not all_data:
        print("  No savings/investment data could be downloaded")
        return None

    result = all_data[0]
    for df in all_data[1:]:
        result = result.merge(df, on=['iso3', 'year'], how='outer')

    result = result.sort_values(['iso3', 'year']).reset_index(drop=True)
    result.to_csv(outfile, index=False)
    print(f"  Saved: {outfile} ({len(result):,} rows, {result['iso3'].nunique()} countries)")
    return result


# ---------------------------------------------------------------------------
# 10. OECD Pension Data (Social Expenditure + Replacement Rates)
# ---------------------------------------------------------------------------

def download_oecd_pensions(force=False):
    """
    Download pension indicators from OECD SOCX and World Bank.

    Sources:
    - OECD SOCX: Public old-age spending as % of GDP (43 OECD countries, 2000-2024)
    - World Bank: Social insurance coverage (118 countries, sparser)
    """
    outfile = RAW_DIR / "oecd_pensions.csv"
    if outfile.exists() and not force:
        print(f"Pension data already exists at {outfile}")
        return pd.read_csv(outfile)

    _ensure_dirs()
    print("Downloading pension data...")

    records = []
    headers = {'User-Agent': 'Mozilla/5.0 (demographics-capital-flows)'}

    # --- OECD SOCX: Old-age spending as % of GDP ---
    print("  Fetching OECD SOCX old-age spending...")
    socx_url = (
        "https://sdmx.oecd.org/public/rest/data/"
        "OECD.ELS.SPD,DSD_SOCX_AGG@DF_SOCX_AGG,/"
        ".A..PT_B1GQ.ES10._T._T.?"
        "startPeriod=1980&dimensionAtObservation=AllDimensions"
        "&format=csvfilewithlabels"
    )
    try:
        resp = requests.get(socx_url, headers=headers, timeout=90)
        resp.raise_for_status()
        socx = pd.read_csv(io.StringIO(resp.text))
        print(f"    SOCX raw: {socx.shape}")

        # Find columns by pattern
        ref_col = [c for c in socx.columns if 'REF_AREA' in c.upper()]
        time_col = [c for c in socx.columns if 'TIME_PERIOD' in c.upper()]
        val_col = [c for c in socx.columns if 'OBS_VALUE' in c.upper()]

        if ref_col and time_col and val_col:
            socx_clean = socx[[ref_col[0], time_col[0], val_col[0]]].copy()
            socx_clean.columns = ['iso3', 'year', 'pension_spending_gdp']
            socx_clean['year'] = pd.to_numeric(socx_clean['year'], errors='coerce')
            socx_clean['pension_spending_gdp'] = pd.to_numeric(
                socx_clean['pension_spending_gdp'], errors='coerce')
            # Drop aggregates like 'OECD', 'EU27', etc.
            socx_clean = socx_clean[socx_clean['iso3'].str.len() == 3]
            socx_clean = socx_clean.dropna()
            # Average if duplicates per country-year
            socx_clean = socx_clean.groupby(['iso3', 'year'], as_index=False).mean()
            records.append(socx_clean)
            print(f"    SOCX clean: {len(socx_clean)} obs, "
                  f"{socx_clean['iso3'].nunique()} countries, "
                  f"{socx_clean['year'].min():.0f}-{socx_clean['year'].max():.0f}")
        else:
            print(f"    SOCX columns not matched. Available: {socx.columns.tolist()[:10]}")
    except Exception as e:
        print(f"    SOCX download failed: {e}")

    # --- World Bank: Social insurance coverage ---
    print("  Fetching World Bank social insurance coverage...")
    try:
        import wbgapi as wb
        wb_data = wb.data.DataFrame(
            'per_si_allsi.cov_pop_tot',
            economy='all',
            time=range(1990, 2025),
            labels=False,
            skipBlanks=True,
        )
        # wb returns wide format: index=country, columns=YRxxxx
        wb_data = wb_data.reset_index()
        wb_long = wb_data.melt(
            id_vars=['economy'],
            var_name='year_str',
            value_name='pension_coverage'
        )
        wb_long['iso3'] = wb_long['economy']
        wb_long['year'] = pd.to_numeric(
            wb_long['year_str'].str.replace('YR', ''), errors='coerce')
        wb_long = wb_long[['iso3', 'year', 'pension_coverage']].dropna()
        records.append(wb_long)
        print(f"    WB clean: {len(wb_long)} obs, "
              f"{wb_long['iso3'].nunique()} countries")
    except ImportError:
        print("    wbgapi not installed, trying REST API...")
        try:
            wb_url = (
                "https://api.worldbank.org/v2/country/all/indicator/"
                "per_si_allsi.cov_pop_tot?format=json&per_page=10000&date=1990:2024"
            )
            resp = requests.get(wb_url, headers=headers, timeout=60)
            resp.raise_for_status()
            data = resp.json()
            if len(data) > 1:
                wb_rows = []
                for entry in data[1]:
                    if entry.get('value') is not None:
                        wb_rows.append({
                            'iso3': entry['country']['id'],
                            'year': int(entry['date']),
                            'pension_coverage': float(entry['value']),
                        })
                wb_df = pd.DataFrame(wb_rows)
                records.append(wb_df)
                print(f"    WB REST clean: {len(wb_df)} obs, "
                      f"{wb_df['iso3'].nunique()} countries")
        except Exception as e2:
            print(f"    WB REST also failed: {e2}")
    except Exception as e:
        print(f"    WB download failed: {e}")

    # Merge all pension sources
    if not records:
        print("  No pension data could be downloaded")
        return None

    result = records[0]
    for df in records[1:]:
        result = result.merge(df, on=['iso3', 'year'], how='outer')

    result = result.sort_values(['iso3', 'year']).reset_index(drop=True)
    result.to_csv(outfile, index=False)
    print(f"  Saved: {outfile} ({len(result):,} rows, "
          f"{result['iso3'].nunique()} countries)")
    return result


def download_all(fred_api_key=None, force=False):
    """Download all data sources. Continues on individual failures."""
    results = {}

    download_fns = [
        ('un_wpp', lambda: download_un_wpp(force=force)),
        ('weo', lambda: download_weo(force=force)),
        ('pwt', lambda: download_pwt(force=force)),
        ('wdi', lambda: download_wdi(force=force)),
        ('kaopen', lambda: download_kaopen(force=force)),
        ('ewn', lambda: download_ewn(force=force)),
    ]

    for name, fn in download_fns:
        try:
            results[name] = fn()
        except Exception as e:
            print(f"  ERROR downloading {name}: {e}")
            results[name] = None

    if fred_api_key:
        try:
            results['fred'] = download_fred_rates(fred_api_key, force=force)
        except Exception as e:
            print(f"  ERROR downloading FRED: {e}")
            results['fred'] = None
    else:
        print("Skipping FRED download (no API key provided)")

    try:
        results['imf_ifs'] = download_imf_ifs_rates(force=force)
    except Exception as e:
        print(f"  ERROR downloading IMF IFS: {e}")
        results['imf_ifs'] = None

    try:
        results['pensions'] = download_oecd_pensions(force=force)
    except Exception as e:
        print(f"  ERROR downloading pensions: {e}")
        results['pensions'] = None

    try:
        results['wdi_savings'] = download_wdi_savings(force=force)
    except Exception as e:
        print(f"  ERROR downloading WDI savings: {e}")
        results['wdi_savings'] = None

    # Summary
    print("\n=== Download Summary ===")
    for name, df in results.items():
        if df is not None:
            print(f"  {name}: {df.shape[0]:,} rows × {df.shape[1]} cols")
        else:
            print(f"  {name}: FAILED")

    return results


if __name__ == "__main__":
    import sys
    api_key = sys.argv[1] if len(sys.argv) > 1 else None
    download_all(fred_api_key=api_key)
